
*Input files are CP_yyyy: 2004-2019.
*Output files are CP_merged and PriceAggregation/CP_AGG_yyyy.dta
*Module-level Welfare Relevant Inflation and Chianed Index are created and aggregated with the Cobb-Douglas function.


use "Stata_intermediate\stataMonthly\CP_2004.dta",clear
forvalues v = 2005(1)2019 {
	append using "Stata_intermediate\stataMonthly\CP_`v'.dta"
   } 
** Delete magnet 
gen magnet = 0
replace magnet = 1 if product_module_code < 470 & product_module_code > 444
replace magnet = 1 if product_module_code == 750
drop if magnet == 1 
egen time = group(yqVal)
egen upcs = group(upc upc_ver_uc)
xtset upcs time
gen dlogp = log(price) - log(l.price)
** Sample selection
replace dlogp = 0 if time == 1
drop if dlogp >1 | dlogp <-1
drop magnet	time	upcs	dlogp

save "Stata_intermediate\stataMonthly\CP_merged.dta",replace


forvalues t = 2004(1)2019 {
	use "Stata_intermediate\stataMonthly\CP_merged.dta",clear
	drop if year < `t'
	duplicates tag upc upc_ver_uc, g(n_ym) 
	replace n_ym = (n_ym+1)/12
	local tl = 2019-`t' + 1
	local gap = (2019-`t' + 1)*12 -6
	keep if n_ym == `tl'
	bysort product_module_code yqVal: egen  double weight_tmp = total(pfsum_actualPaid)
	bysort yqVal: egen  double cobb_weight_tmp = total(pfsum_actualPaid)

	gen  double weight = pfsum_actualPaid/weight_tmp
	gen  double cobb_weight = weight_tmp /cobb_weight_tmp
	
	drop if weight == 1
	drop weight_tmp	cobb_weight_tmp n_ym
	
	** Chained Index
	egen time = group(yqVal) 
	egen upcs = group(upc upc_ver_uc)
	duplicates drop time upcs,force
	xtset upcs time
	gen p_ratio = (f.price/price)
	gen LAS_tmp = (weight)*p_ratio
	gen PASS_tmp = (f.weight)*(1/p_ratio)

	bysort time product_module_code : egen p_LAS_monthly = total(LAS_tmp)
	bysort time product_module_code : egen p_PASS_monthly_INV = total(PASS_tmp)
	gen p_PASS_monthly = 1/p_PASS_monthly_INV 
	gen p_FIS_monthly = sqrt(p_PASS_monthly*p_LAS_monthly)	

	**Log version
	xtset upcs time
	gen dlogp_rev = log(f.price/price)
	gen dlogLAS_tmp = (weight)*dlogp_rev
	gen dlogPASS_tmp = (f.weight)*(dlogp_rev)

	bysort time product_module_code : egen dlog_LAS_monthly = total(dlogLAS_tmp)
	bysort time product_module_code : egen dlog_PASS_monthly = total(dlogPASS_tmp)
	* TORN
	xtset upcs time
	gen  dlogTOR_tmp = (1/2)*(f.weight+weight)*dlogp_rev
	bysort time product_module_code : egen dlogTOR_monthly = total(dlogTOR_tmp)

	
	**For EV and CV: price at end of the year
	gen ptend_tmp = 0
	replace ptend_tmp = price if yqVal == 201906
	gen wend_tmp = 0
	replace wend_tmp = weight if yqVal == 201906

	bysort upcs :egen ptend = total(ptend_tmp)
	bysort upcs :egen wend = total(wend_tmp)

	drop ptend_tmp wend_tmp
	
	gen dloga = (log(price)-log(ptend))
	bysort  product_module_code yqVal:egen E_dloga = total(dloga*wend)


	gen EV_tmp = (wend*(price/ptend)^(1-4.5))
*	bysort product_module_code yqVal:egen dlog_EV_long_ave = total(wend*dlog_EV_long)
	** for covariance
	bysort product_module_code yqVal:egen EV_tmp2 = total(EV_tmp)
*	gen diff_dlog_EV_long = dlog_EV_long - dlog_EV_long_ave
	gen dlogEV = log(EV_tmp2^(-1/(1-4.5)))


	gen CV_tmp = (weight*(ptend/price)^(1-4.5))
	bysort  product_module_code yqVal:egen CV_tmp2 = total(CV_tmp)
	gen dlogCV = log(CV_tmp2^(1/(1-4.5)))
	drop EV_tmp EV_tmp2 CV_tmp CV_tmp2

* 6.5 version

	gen EV_tmp = (wend*(price/ptend)^(1-6.5))
	bysort  product_module_code yqVal:egen EV_tmp2 = total(EV_tmp)
	gen dlogEV65 = log(EV_tmp2^(-1/(1-6.5)))

	gen CV_tmp = (weight*(ptend/price)^(1-6.5))
	bysort  product_module_code yqVal:egen CV_tmp2 = total(CV_tmp)
	gen dlogCV65 = log(CV_tmp2^(1/(1-6.5)))
	drop EV_tmp EV_tmp2 CV_tmp CV_tmp2

* 2.5 version

	gen EV_tmp = (wend*(price/ptend)^(1-2.5))
	bysort  product_module_code yqVal:egen EV_tmp2 = total(EV_tmp)
	gen dlogEV25 = log(EV_tmp2^(-1/(1-2.5)))

	gen CV_tmp = (weight*(ptend/price)^(1-2.5))
	bysort  product_module_code yqVal:egen CV_tmp2 = total(CV_tmp)
	gen dlogCV25 = log(CV_tmp2^(1/(1-2.5)))
	drop EV_tmp EV_tmp2 CV_tmp CV_tmp2

	* Module level Loop
	gen p_LAS_CUM = 1 if yqVal == 201906
	gen p_PASS_CUM = 1 if yqVal == 201906
	gen p_FIS_CUM = 1 if yqVal == 201906
	gen dlog_LAS_CUM = 0 if yqVal == 201906
	gen dlog_PASS_CUM = 0 if yqVal == 201906
	gen dlog_TOR_CUM = 0 if yqVal == 201906

	
	xtset upcs time
	forvalues tloop = 2/`gap' {
		local tloop_local = `gap'-`tloop' + 1
	replace p_LAS_CUM = f.p_LAS_CUM*p_LAS_monthly if `tloop_local' == time
	replace p_PASS_CUM = f.p_PASS_CUM*p_PASS_monthly  if `tloop_local' == time
	replace p_FIS_CUM = f.p_FIS_CUM*p_FIS_monthly  if `tloop_local' == time
	replace dlog_LAS_CUM = f.dlog_LAS_CUM + dlog_LAS_monthly  if `tloop_local' == time
	replace dlog_PASS_CUM = f.dlog_PASS_CUM + dlog_PASS_monthly  if `tloop_local' == time
	replace dlog_TOR_CUM = f.dlog_TOR_CUM + dlogTOR_monthly  if `tloop_local' == time
        } 
	drop dlogTOR_tmp dlogLAS_tmp dlogPASS_tmp LAS_tmp PASS_tmp
		gen p_PASS_CUM_NET = p_PASS_CUM-1
		gen p_LAS_CUM_NET = p_LAS_CUM-1
		gen p_FIS_CUM_NET = p_FIS_CUM-1	

	
***	**Cobb Aggregation
** Assume a Cobb-Douglas utility function as the module-level.
** Initial Taste, Final Tastes, and Chained are all aggregated in Cobb-Douglas (Constant Weight).
** For Final Tastes, we use the weight of t1 (=201906) and the remaining weight of t0 for all periods.
	duplicates drop time product_module_code,force	
	gen cobb_weight_end_tmp  = 0 
	replace cobb_weight_end_tmp = cobb_weight if yqVal == 201906
	bysort product_module_code :egen cobb_weight_end = total(cobb_weight_end_tmp)	
	
	bysort time : egen p_LAS_CUM_AGG = total(p_LAS_CUM_NET*cobb_weight)
	bysort time : egen p_PASS_CUM_AGG = total(p_PASS_CUM_NET*cobb_weight)
	bysort time : egen p_FIS_CUM_AGG = total(p_FIS_CUM_NET*cobb_weight)
	bysort time : egen dlog_LAS_CUM_AGG = total(dlog_LAS_CUM*cobb_weight)
	bysort time : egen dlog_PASS_CUM_AGG = total(dlog_PASS_CUM*cobb_weight)	
	bysort time : egen dlog_TOR_CUM_AGG = total(dlog_TOR_CUM*cobb_weight)	
	*CV and EV
	bysort time : egen dlogEV_AGG = total(dlogEV*cobb_weight_end)	
	bysort time : egen dlogCV_AGG = total(dlogCV*cobb_weight)	
	bysort time : egen dlogEV65_AGG = total(dlogEV65*cobb_weight_end)	
	bysort time : egen dlogCV65_AGG = total(dlogCV65*cobb_weight)	
	bysort time : egen dlogEV25_AGG = total(dlogEV25*cobb_weight_end)	
	bysort time : egen dlogCV25_AGG = total(dlogCV25*cobb_weight)	

	duplicates drop time,force

	keep p_LAS_CUM_AGG p_PASS_CUM_AGG p_FIS_CUM_AGG dlog_LAS_CUM_AGG dlog_PASS_CUM_AGG dlog_TOR_CUM_AGG yqVal	year	month time dlogEV_AGG dlogCV_AGG dlogEV65_AGG dlogCV65_AGG dlogEV25_AGG dlogCV25_AGG
	
	save "Stata_intermediate/PriceAggregation/CP_AGG_`t'.dta",replace
}
